read AST data - from NCBI

ast<-read_delim("Ecoli_AST.tsv.gz") %>% rename(BioSample=`#BioSample`)

length(unique(ast$BioSample))
## [1] 9094
ast %>% group_by(BioSample) %>% summarise(n=length(unique((Antibiotic))))%>% pull(n) %>% summary()
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   15.00   15.33   18.00   36.00

read AST data - from BSAC paper

# matrix, one row per strain
# Moradigaravand 2018, https://doi.org/10.1371/journal.pcbi.1006258
# AST in Table S1, parsed to add BioSample via ENA
bsac<-read_delim("BSAC_AST.txt")

dim(bsac)
## [1] 1936   19
# check if data was in NCBI already (no)
sum(bsac$BioSample %in% ast$BioSample)
## [1] 0
ast_bsac <- bsac %>% select(BioSample,CTZ:CIP) %>%
  rename(amoxicillin=AMX,
         `amoxicillin-clavulanate`=AMC,
         ampicillin=AMP, 
         cephalothin=CET, 
         ceftriaxone=CTX, 
         ceftazidime=CTZ,
         cefuroxime=CXM,
         ciprofloxacin=CIP,
         gentamicin=GEN, 
         `piperacillin-tazobactam`=TZP, 
         trimethoprim=TMP,
         tobramycin=TBM
         ) %>%
  pivot_longer(!BioSample, names_to="Antibiotic", values_to = "SIR") %>%
  mutate(`Resistance phenotype`=case_when(
    SIR=="S" ~ "susceptible",
    SIR=="R" ~ "resistant",
    SIR=="I" ~ "resistant",
    TRUE ~ NA
  )) %>% select(-SIR)

# add to AST data list
ast <- ast %>% select(BioSample, Antibiotic, `Resistance phenotype`) %>% bind_rows(ast_bsac)

read AST data - from Mills paper

# matrix, one row per strain
# Mills 2022, https://doi.org/10.1186/s13073-022-01150-7
# AST in Table S2, parsed to add BioSample via ENA
mills<-read_excel("Mills_AST.xlsx")

dim(mills)
## [1] 2075   40
# check if data was in NCBI already (yes)
sum(mills$BioSample %in% ast$BioSample)
## [1] 2075
ast_mills <- mills %>% select(BioSample,ends_with("call")) %>%
  pivot_longer(!BioSample, names_to="Antibiotic", values_to = "SIR") %>%
  mutate(`Resistance phenotype`=case_when(
    SIR=="S" ~ "susceptible",
    SIR=="R" ~ "resistant",
    SIR=="I" ~ "resistant",
    TRUE ~ NA
  )) %>% select(-SIR) %>%
  mutate(Antibiotic=gsub(" call", "", Antibiotic)) %>%
  mutate(Antibiotic=tolower(Antibiotic))

# compare with NCBI calls
ast_mills %>% left_join(ast, by=c("BioSample", "Antibiotic")) %>% group_by(`Resistance phenotype.x`,`Resistance phenotype.y`) %>% count()
## # A tibble: 5 × 3
## # Groups:   Resistance phenotype.x, Resistance phenotype.y [5]
##   `Resistance phenotype.x` `Resistance phenotype.y`     n
##   <chr>                    <chr>                    <int>
## 1 resistant                intermediate               246
## 2 resistant                resistant                 3133
## 3 resistant                <NA>                       799
## 4 susceptible              susceptible              29821
## 5 susceptible              <NA>                      3351
ast_pub <- bind_rows(ast_mills, ast_bsac)

read ATB AMRfinderplus calls

afp <-read_tsv("ATB_Ecoli_AFP.tsv.gz")
gene_counts <- afp %>% group_by(`Gene symbol`, Class, Subclass) %>% count()

species_calls <- read_tsv("species_calls.tsv.gz")
ecoli <- species_calls %>% filter(Species=="Escherichia coli") %>% pull(Sample)

afp_status <- read_tsv("AMRFP_status.tsv.gz") %>% filter(sample %in% ecoli)
denominator <- afp_status %>% filter(status=="PASS") %>% nrow()

# afp results including null row for each genome that ran but returned no hits
afp_all <- afp_status %>% rename(Name=sample) %>% left_join(afp) %>% filter(status=="PASS")

read Enterobase typing info

ec_types <- read_tsv(file="ec_types_enterobase.tsv") %>% rename(Name=sample_accession)

get AMRfinderplus and AST data where both are available

sum(unique(ast$BioSample) %in% afp$Name)
## [1] 8701
# AST data for strains with AMRfinder run successfully
ast_matched <- ast %>% filter(BioSample %in% afp_status$sample[afp_status$status=="PASS"])

# AMRfinder results, for strains with matching AST data from NCBI
afp_matched <- afp %>% filter(Name %in% ast$BioSample)

# check objects
dim(ast_matched)
## [1] 130946      3
ast_matched %>% pull(BioSample) %>% unique() %>% length()
## [1] 8701
dim(afp_matched)
## [1] 245838      7
afp_matched %>% pull(Name) %>% unique() %>% length()
## [1] 8701

frequency of common core-gene mutations

# number with pmrB_Y358N
(gene_counts %>% filter(`Gene symbol`=="pmrB_Y358N") %>% pull(n))/denominator
## [1] 0.4652073
pmrB_Y358N <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="pmrB_Y358N")

pmrB_Y358N$clermont
## # A tibble: 16 × 4
##    `Clermont Type (ClermonTyping)`     freq     n denom
##    <chr>                              <dbl> <dbl> <int>
##  1 -                               0.333        2     6
##  2 A                               0.295    14713 49862
##  3 B1                              0.976    53783 55130
##  4 B2                              0.000386    14 36252
##  5 C                               0.990     5496  5552
##  6 D                               0.0301     458 15207
##  7 E                               0.898    19666 21893
##  8 E or cladeI                     0.341       56   164
##  9 F                               0.00887     37  4171
## 10 G                               0.100      331  3307
## 11 ND                              0.418      220   526
## 12 Non Escherichia                 0            0     6
## 13 Unknown                         0.0233     336 14446
## 14 albertii                        0.25         1     4
## 15 cladeI                          0            0   653
## 16 fergusonii                      1            1     1
pmrB_Y358N$pathovar
## # A tibble: 19 × 4
##    Pathovar                 freq     n  denom
##    <chr>                   <dbl> <dbl>  <int>
##  1 -                    0.270    32596 120659
##  2 E. albertii          0.25         1      4
##  3 E. coli - EHEC       0.958    35843  37415
##  4 E. coli - EIEC       0.480      240    500
##  5 E. coli - EIEC/EHEC  1            1      1
##  6 E. coli - EIEC/EPEC  0            0      1
##  7 E. coli - EPEC       0.527     3351   6362
##  8 E. coli - ERROR      1            2      2
##  9 E. coli - ETEC       0.440     1439   3269
## 10 E. coli - ETEC/EHEC  0.875      126    144
## 11 E. coli - ETEC/EPEC  0.935       43     46
## 12 E. coli - ETEC/STEC  0.326      238    730
## 13 E. coli - STEC       0.810     9681  11954
## 14 ND                   0.418      220    526
## 15 Shigella             1            2      2
## 16 Shigella boydii      0.559      457    818
## 17 Shigella dysenteriae 0.981      916    934
## 18 Shigella flexneri    0.999     9953   9964
## 19 Shigella sonnei      0.000361     5  13849
pmrB_Y358N$clermont_plot

pmrB_Y358N$pathovar_plot

# number with glpT_E448K 
(gene_counts %>% filter(`Gene symbol`=="glpT_E448K") %>% pull(n))/denominator
## [1] 0.8692433
glpT_E448K <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="glpT_E448K")

glpT_E448K$clermont
## # A tibble: 16 × 4
##    `Clermont Type (ClermonTyping)`  freq     n denom
##    <chr>                           <dbl> <dbl> <int>
##  1 -                               1         6     6
##  2 A                               0.572 28543 49862
##  3 B1                              0.997 54952 55130
##  4 B2                              0.948 34361 36252
##  5 C                               0.981  5448  5552
##  6 D                               0.996 15151 15207
##  7 E                               0.997 21826 21893
##  8 E or cladeI                     0.780   128   164
##  9 F                               0.997  4159  4171
## 10 G                               0.997  3297  3307
## 11 ND                              0.914   481   526
## 12 Non Escherichia                 1         6     6
## 13 Unknown                         0.987 14258 14446
## 14 albertii                        1         4     4
## 15 cladeI                          0.998   652   653
## 16 fergusonii                      1         1     1
glpT_E448K$pathovar
## # A tibble: 19 × 4
##    Pathovar              freq      n  denom
##    <chr>                <dbl>  <dbl>  <int>
##  1 -                    0.831 100296 120659
##  2 E. albertii          1          4      4
##  3 E. coli - EHEC       0.988  36980  37415
##  4 E. coli - EIEC       0.564    282    500
##  5 E. coli - EIEC/EHEC  1          1      1
##  6 E. coli - EIEC/EPEC  1          1      1
##  7 E. coli - EPEC       0.851   5411   6362
##  8 E. coli - ERROR      1          2      2
##  9 E. coli - ETEC       0.728   2381   3269
## 10 E. coli - ETEC/EHEC  0.868    125    144
## 11 E. coli - ETEC/EPEC  0.935     43     46
## 12 E. coli - ETEC/STEC  0.789    576    730
## 13 E. coli - STEC       0.939  11229  11954
## 14 ND                   0.914    481    526
## 15 Shigella             1          2      2
## 16 Shigella boydii      0.996    815    818
## 17 Shigella dysenteriae 1        934    934
## 18 Shigella flexneri    0.999   9959   9964
## 19 Shigella sonnei      0.993  13751  13849
glpT_E448K$clermont_plot

glpT_E448K$pathovar_plot

blaEC

# blaEC from full data, so we get accessions
blaEC <- read_tsv("bla_EC_hits.tsv")

blaEC_summary <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="blaEC")

blaEC_summary$clermont_plot

blaEC_summary$pathovar_plot

# boxplot of identity of hits, stratified by accession of closest ref seq
blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>% 
  mutate(ec_label=paste(`Gene symbol`, `Accession of closest sequence`)) %>% 
  ggplot(aes(x=ec_label, y=`% Identity to reference sequence`)) + geom_boxplot() + coord_flip()

# distribution of alleles per clade
blaEC_alleles_clade <- blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>% 
  ggplot(aes(x=`Clermont Type (ClermonTyping)`, fill=`Accession of closest sequence`)) + geom_bar() + coord_flip()

# distribution of alleles per pathovar
blaEC_alleles_pathovar <- blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>% 
  ggplot(aes(x=Pathovar, fill=`Accession of closest sequence`)) + geom_bar() + coord_flip()

blaEC_alleles_clade + blaEC_alleles_pathovar + plot_layout(guides="collect")

(gene_counts %>% filter(`Gene symbol`=="blaEC") %>% pull(n))/denominator
## [1] 0.9647534

solo markers for ceftriaxone

cef_cephalosporin <- solo_marker_atb(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftriaxone", refgene_subclass = "CEPHALOSPORIN")

cef_cephalosporin$solo_stats
## # A tibble: 78 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T        5 resistant     0 0      0       0          0    
##  2 ampC_C-42T       35 resistant     3 0.0857 0.0473  0          0.178
##  3 ampC_T-14TGT      1 resistant     0 0      0       0          0    
##  4 ampC_T-32A       56 resistant     4 0.0714 0.0344  0.00397    0.139
##  5 blaCMY            7 resistant     4 0.571  0.187   0.205      0.938
##  6 blaCMY-111        1 resistant     1 1      0       1          1    
##  7 blaCMY-145        1 resistant     1 1      0       1          1    
##  8 blaCMY-17         1 resistant     1 1      0       1          1    
##  9 blaCMY-2        125 resistant   123 0.984  0.0112  0.962      1    
## 10 blaCMY-4          4 resistant     3 0.75   0.217   0.326      1    
## # ℹ 68 more rows
cef_cephalosporin$combined_plot

model for ceftriaxone

cef_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftriaxone", refgene_subclass = "CEPHALOSPORIN")

cef_model <- logistf(resistant ~ ., data=cef_binary)

cef_logreg <- logreg_details(cef_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

cef_logreg

solo markers for fosfomycin

fos_fosfomycin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="fosfomycin", refgene_subclass="FOSFOMYCIN", afp_matched=afp_matched)

fos_fosfomycin$solo_stats
## # A tibble: 6 × 8
##   `Gene symbol` total category           n      p     se ci.lower ci.upper
##   <chr>         <int> <chr>          <int>  <dbl>  <dbl>    <dbl>    <dbl>
## 1 glpT_E448K       35 resistant          0 0      0             0   0     
## 2 uhpT_E350Q        2 resistant          0 0      0             0   0     
## 3 <NA>             35 resistant          1 0.0286 0.0282        0   0.0838
## 4 glpT_E448K       35 nonsusceptible     0 0      0             0   0     
## 5 uhpT_E350Q        2 nonsusceptible     0 0      0             0   0     
## 6 <NA>             35 nonsusceptible     1 0.0286 0.0282        0   0.0838
fos_fosfomycin$combined_plot

model for fosfomycin

fos_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "fosfomycin", refgene_subclass = "FOSFOMYCIN")

fos_model <- logistf(resistant ~ ., data=fos_binary)

fos_logreg <- logreg_details(fos_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

fos_logreg

solo markers for colistin

colistin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="colistin", refgene_subclass="COLISTIN", afp_matched=afp_matched)

colistin$solo_stats
## # A tibble: 10 × 8
##    `Gene symbol` total category           n       p       se ci.lower ci.upper
##    <chr>         <int> <chr>          <int>   <dbl>    <dbl>    <dbl>    <dbl>
##  1 mcr-1.1           1 resistant          0   0       0        0         0    
##  2 mcr-2.1           0 resistant          0 NaN     NaN      NaN       NaN    
##  3 pmrB_E123D       51 resistant          0   0       0        0         0    
##  4 pmrB_Y358N        6 resistant          0   0       0        0         0    
##  5 <NA>             48 resistant          7   0.146   0.0509   0.0460    0.246
##  6 mcr-1.1           1 nonsusceptible     1   1       0        1         1    
##  7 mcr-2.1           0 nonsusceptible     0 NaN     NaN      NaN       NaN    
##  8 pmrB_E123D       51 nonsusceptible    49   0.961   0.0272   0.908     1    
##  9 pmrB_Y358N        6 nonsusceptible     4   0.667   0.192    0.289     1    
## 10 <NA>             48 nonsusceptible    46   0.958   0.0288   0.902     1
colistin$combined_plot

model for colistin

col_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "colistin", refgene_subclass = "COLISTIN")

col_model <- logistf(resistant ~ ., data=col_binary)

col_logreg <- logreg_details(col_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

col_logreg

solo markers for gentamicin vs aminoglycosides

gentamicin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="gentamicin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)

gentamicin_agly$solo_stats
## # A tibble: 36 × 8
##    `Gene symbol` total category      n      p      se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>   <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa       1 resistant     0 0      0       0          0     
##  2 aac(3)-IId       61 resistant    60 0.984  0.0163  0.952      1     
##  3 aac(3)-IIe       43 resistant    42 0.977  0.0230  0.932      1     
##  4 aac(3)-VIa        3 resistant     1 0.333  0.272   0          0.867 
##  5 aac(6')-Ib        1 resistant     0 0      0       0          0     
##  6 aac(6')-Ib4       6 resistant     0 0      0       0          0     
##  7 aadA1           268 resistant     4 0.0149 0.00741 0.000408   0.0294
##  8 aadA12            1 resistant     0 0      0       0          0     
##  9 aadA16            1 resistant     0 0      0       0          0     
## 10 aadA2            38 resistant     0 0      0       0          0     
## # ℹ 26 more rows
gentamicin_agly$combined_plot

model for gentamicin

gent_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "gentamicin", refgene_class = "AMINOGLYCOSIDE")

gent_model_glm <- glm(resistant ~., family=binomial(link="logit"), data=gent_binary)

gent_logreg_glm <- summary(gent_model_glm)$coef %>% 
    as_tibble(rownames="marker") %>% 
    rename(est=Estimate, pval=`Pr(>|z|)`) %>% 
    select(marker, est, pval) %>%
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient", col="p<0.05")

gent_logreg_glm

gent_model <- logistf(resistant ~ ., data=gent_binary[,colSums(gent_binary)>=20])

gent_logistf <- logreg_details(gent_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

gent_logistf

solo markers for amikacin vs aminoglycosides

amikacin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="amikacin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)

amikacin_agly$solo_stats
## # A tibble: 30 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa       1 resistant     0 0      0             0   0     
##  2 aac(3)-IId       25 resistant     0 0      0             0   0     
##  3 aac(3)-IIe       35 resistant     1 0.0286 0.0282        0   0.0838
##  4 aac(6')-Ib        1 resistant     1 1      0             1   1     
##  5 aac(6')-Ib4       5 resistant     0 0      0             0   0     
##  6 aadA1            75 resistant     1 0.0133 0.0132        0   0.0393
##  7 aadA16            1 resistant     0 0      0             0   0     
##  8 aadA2            21 resistant     0 0      0             0   0     
##  9 aadA22            3 resistant     0 0      0             0   0     
## 10 aadA5            74 resistant     3 0.0405 0.0229        0   0.0855
## # ℹ 20 more rows
amikacin_agly$combined_plot

model for amikacin

amk_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "amikacin", refgene_class = "AMINOGLYCOSIDE")

amk_model <- logistf(resistant ~ ., data=amk_binary)

amk_logreg <- logreg_details(amk_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

amk_logreg

solo markers for tobramycin vs aminoglycosides

tobramycin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tobramycin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)

tobramycin_agly$solo_stats
## # A tibble: 34 × 8
##    `Gene symbol` total category      n     p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl>  <dbl>    <dbl>    <dbl>
##  1 aac(2')-IIa       1 resistant     0   0    0            0    0    
##  2 aac(3)-IId       44 resistant    NA  NA   NA           NA   NA    
##  3 aac(3)-IIe       32 resistant    NA  NA   NA           NA   NA    
##  4 aac(3)-VIa        0 resistant    NA  NA   NA           NA   NA    
##  5 aac(6')-Ib        1 resistant     1   1    0            1    1    
##  6 aac(6')-Ib4       5 resistant     2   0.4  0.219        0    0.829
##  7 aadA1           117 resistant    NA  NA   NA           NA   NA    
##  8 aadA12            0 resistant    NA  NA   NA           NA   NA    
##  9 aadA16            1 resistant     0   0    0            0    0    
## 10 aadA2            23 resistant    NA  NA   NA           NA   NA    
## # ℹ 24 more rows
tobramycin_agly$combined_plot

model for tobramycin

tob_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "tobramycin", refgene_class = "AMINOGLYCOSIDE")

tob_model <- logistf(resistant ~ ., data=tob_binary)

tob_logreg <- logreg_details(tob_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

tob_logreg

solo markers for ampicillin

ampicillin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ampicillin", refgene_class="BETA-LACTAM", afp_matched=afp_matched)

ampicillin$solo_stats
## # A tibble: 6 × 8
##   `Gene symbol` total category           n       p       se ci.lower ci.upper
##   <chr>         <int> <chr>          <int>   <dbl>    <dbl>    <dbl>    <dbl>
## 1 blaEC          3332 resistant         NA NA      NA        NA       NA     
## 2 blaEC-5         300 resistant         NA NA      NA        NA       NA     
## 3 <NA>           2458 resistant         NA NA      NA        NA       NA     
## 4 blaEC          3332 nonsusceptible   155  0.0465  0.00365   0.0394   0.0537
## 5 blaEC-5         300 nonsusceptible    17  0.0567  0.0133    0.0305   0.0828
## 6 <NA>           2458 nonsusceptible  2408  0.980   0.00285   0.974    0.985
ampicillin$combined_plot

model for ampicillin

amp_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ampicillin", refgene_class = "BETA-LACTAM")

#amp_model <- logistf(resistant ~ ., data=amp_binary[,colSums(amp_binary)>=20])

amp_model_glm <- glm(resistant ~., family=binomial(link="logit"), data=amp_binary)

amp_logreg_glm <- summary(amp_model_glm)$coef %>% 
    as_tibble(rownames="marker") %>% 
    rename(est=Estimate, pval=`Pr(>|z|)`) %>% 
    select(marker, est, pval) %>%
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient", col="p<0.05")

amp_logreg_glm

solo markers for azithromycin

azi <- solo_marker_atb(ast_matched = ast_matched, antibiotic="azithromycin", refgene_subclass="AZITHROMYCIN", afp_matched=afp_matched)

azi$solo_stats
## # A tibble: 6 × 8
##   `Gene symbol` total category           n       p       se  ci.lower ci.upper
##   <chr>         <int> <chr>          <int>   <dbl>    <dbl>     <dbl>    <dbl>
## 1 mef(B)            3 resistant          0 0       0        0          0      
## 2 mph(A)            8 resistant          7 0.875   0.117    0.646      1      
## 3 <NA>           2770 resistant          4 0.00144 0.000722 0.0000299  0.00286
## 4 mef(B)            3 nonsusceptible     0 0       0        0          0      
## 5 mph(A)            8 nonsusceptible     7 0.875   0.117    0.646      1      
## 6 <NA>           2770 nonsusceptible     4 0.00144 0.000722 0.0000299  0.00286
azi$combined_plot

model for azithromycin

azi_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "azithromycin", refgene_subclass = "AZITHROMYCIN", maf=1)

azi_model <- logistf(resistant ~ ., data=azi_binary)

azi_logreg <- logreg_details(azi_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

azi_logreg

solo markers for imipenem

imipenem <- solo_marker_atb(ast_matched = ast_matched, antibiotic="imipenem", refgene_subclass="CARBAPENEM", afp_matched=afp_matched)

imipenem$solo_stats
## # A tibble: 26 × 8
##    `Gene symbol` total category      n     p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl>  <dbl>    <dbl>    <dbl>
##  1 blaKPC-2          4 resistant     4 1     0         1        1    
##  2 blaKPC-3         12 resistant     8 0.667 0.136     0.400    0.933
##  3 blaKPC-4          2 resistant     1 0.5   0.354     0        1    
##  4 blaNDM-1         10 resistant     9 0.9   0.0949    0.714    1    
##  5 blaNDM-5          4 resistant     4 1     0         1        1    
##  6 blaNDM-6          1 resistant     1 1     0         1        1    
##  7 blaNDM-7          4 resistant     4 1     0         1        1    
##  8 blaOXA-181        1 resistant     0 0     0         0        0    
##  9 blaOXA-48         1 resistant     1 1     0         1        1    
## 10 ompC_Q171STOP     2 resistant     0 0     0         0        0    
## # ℹ 16 more rows
imipenem$combined_plot

model for imipenem

imp_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "imipenem", refgene_subclass = "CARBAPENEM", maf=5)

imp_model <- logistf(resistant ~ ., data=imp_binary)

imp_logreg <- logreg_details(imp_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

imp_logreg

solo markers for meropenem

meropenem <- solo_marker_atb(ast_matched = ast_matched, antibiotic="meropenem", refgene_subclass="CARBAPENEM", afp_matched=afp_matched)

meropenem$solo_stats
## # A tibble: 26 × 8
##    `Gene symbol` total category      n     p    se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl> <dbl>    <dbl>    <dbl>
##  1 blaKPC-2          4 resistant     3  0.75 0.217    0.326    1    
##  2 blaKPC-3         12 resistant     6  0.5  0.144    0.217    0.783
##  3 blaKPC-4          2 resistant     0  0    0        0        0    
##  4 blaNDM-1         11 resistant    11  1    0        1        1    
##  5 blaNDM-5          8 resistant     8  1    0        1        1    
##  6 blaNDM-6          1 resistant     1  1    0        1        1    
##  7 blaNDM-7          4 resistant     4  1    0        1        1    
##  8 blaOXA-181        1 resistant     0  0    0        0        0    
##  9 blaOXA-48         1 resistant     1  1    0        1        1    
## 10 ompC_Q171STOP     2 resistant     1  0.5  0.354    0        1    
## # ℹ 16 more rows
meropenem$combined_plot

model for meropenem

mer_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "meropenem", refgene_subclass = "CARBAPENEM", maf=5)

mer_model <- logistf(resistant ~ ., data=mer_binary)

mer_logreg <- logreg_details(mer_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

mer_logreg

solo markers for ceftazidime

ceftazidime_cephalosporin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ceftazidime", refgene_subclass="CEPHALOSPORIN", afp_matched=afp_matched)

ceftazidime_cephalosporin$solo_stats
## # A tibble: 82 × 8
##    `Gene symbol` total category      n      p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int>  <dbl>  <dbl>    <dbl>    <dbl>
##  1 ampC_C-11T        8 resistant     0 0      0        0        0     
##  2 ampC_C-42T       35 resistant     8 0.229  0.0710   0.0895   0.368 
##  3 ampC_G-15GG       2 resistant     0 0      0        0        0     
##  4 ampC_T-14TGT      3 resistant     1 0.333  0.272    0        0.867 
##  5 ampC_T-32A       65 resistant     3 0.0462 0.0260   0        0.0972
##  6 blaCMY            2 resistant     0 0      0        0        0     
##  7 blaCMY-111        1 resistant     1 1      0        1        1     
##  8 blaCMY-145        1 resistant     1 1      0        1        1     
##  9 blaCMY-2        122 resistant   109 0.893  0.0279   0.839    0.948 
## 10 blaCMY-4          4 resistant     3 0.75   0.217    0.326    1     
## # ℹ 72 more rows
ceftazidime_cephalosporin$combined_plot

model for ceftazidime

caz_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftazidime", refgene_subclass = "CEPHALOSPORIN")

caz_model <- logistf(resistant ~ ., data=caz_binary)

caz_logreg <- logreg_details(caz_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

caz_logreg

solo markers for chloramphenicol

chloramphenicol <- solo_marker_atb(ast_matched = ast_matched, antibiotic="chloramphenicol", refgene_class="PHENICOL", afp_matched=afp_matched)

chloramphenicol$solo_stats
## # A tibble: 14 × 8
##    `Gene symbol` total category        n         p        se  ci.lower  ci.upper
##    <chr>         <int> <chr>       <int>     <dbl>     <dbl>     <dbl>     <dbl>
##  1 catA1            17 resistant      17   1         0         1         1      
##  2 catA2             1 resistant       1   1         0         1         1      
##  3 catB3             0 resistant       0 NaN       NaN       NaN       NaN      
##  4 cmlA1            21 resistant      17   0.810     0.0857    0.642     0.977  
##  5 cmlA5             0 resistant       0 NaN       NaN       NaN       NaN      
##  6 floR             84 resistant      83   0.988     0.0118    0.965     1      
##  7 <NA>           2670 resistant      13   0.00487   0.00135   0.00223   0.00751
##  8 catA1            17 nonsuscept…    17   1         0         1         1      
##  9 catA2             1 nonsuscept…     1   1         0         1         1      
## 10 catB3             0 nonsuscept…     0 NaN       NaN       NaN       NaN      
## 11 cmlA1            21 nonsuscept…    18   0.857     0.0764    0.707     1      
## 12 cmlA5             0 nonsuscept…     0 NaN       NaN       NaN       NaN      
## 13 floR             84 nonsuscept…    83   0.988     0.0118    0.965     1      
## 14 <NA>           2670 nonsuscept…    57   0.0213    0.00280   0.0159    0.0268
chloramphenicol$combined_plot

model for chloramphenicol

chloramphenicol_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "chloramphenicol", refgene_class = "PHENICOL")

chloramphenicol_model <- logistf(resistant ~ ., data=chloramphenicol_binary)

chloramphenicol_logreg <- logreg_details(chloramphenicol_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

chloramphenicol_logreg

solo markers for trimethoprim

tmx <- solo_marker_atb(ast_matched = ast_matched, antibiotic="trimethoprim-sulfamethoxazole", refgene_class=c("TRIMETHOPRIM", "SULFONAMIDE"), afp_matched=afp_matched)

tmx$solo_stats
## # A tibble: 26 × 8
##    `Gene symbol` total category      n     p     se ci.lower ci.upper
##    <chr>         <int> <chr>     <int> <dbl>  <dbl>    <dbl>    <dbl>
##  1 dfrA1            32 resistant     9 0.281 0.0795   0.125     0.437
##  2 dfrA12            5 resistant     0 0     0        0         0    
##  3 dfrA14            8 resistant     3 0.375 0.171    0.0395    0.710
##  4 dfrA16            3 resistant     0 0     0        0         0    
##  5 dfrA17           18 resistant    11 0.611 0.115    0.386     0.836
##  6 dfrA5             8 resistant     0 0     0        0         0    
##  7 dfrA7            14 resistant     2 0.143 0.0935   0         0.326
##  8 dfrA8             3 resistant     2 0.667 0.272    0.133     1    
##  9 folP_P64S         1 resistant     0 0     0        0         0    
## 10 sul1            220 resistant     0 0     0        0         0    
## # ℹ 16 more rows
tmx$combined_plot

model for trimethoprim

tmx_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "trimethoprim-sulfamethoxazole", refgene_class = c("TRIMETHOPRIM", "SULFONAMIDE"))

tmx_model <- logistf(resistant ~ ., data=tmx_binary)

tmx_logreg <- logreg_details(tmx_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

tmx_logreg

# solo markers for tetracycline

tet <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tetracycline", refgene_class="TETRACYCLINE", afp_matched=afp_matched)

tet$solo_stats
## # A tibble: 16 × 8
##    `Gene symbol` total category           n      p      se ci.lower ci.upper
##    <chr>         <int> <chr>          <int>  <dbl>   <dbl>    <dbl>    <dbl>
##  1 acrB_R620C        3 resistant          0 0      0         0        0     
##  2 tet(A)          943 resistant        938 0.995  0.00236   0.990    0.999 
##  3 tet(B)          826 resistant        818 0.990  0.00341   0.984    0.997 
##  4 tet(C)           47 resistant         15 0.319  0.0680    0.186    0.452 
##  5 tet(D)           28 resistant         28 1      0         1        1     
##  6 tet(H)            2 resistant          2 1      0         1        1     
##  7 tet(M)            6 resistant          0 0      0         0        0     
##  8 <NA>           3318 resistant        177 0.0533 0.00390   0.0457   0.0610
##  9 acrB_R620C        3 nonsusceptible     0 0      0         0        0     
## 10 tet(A)          943 nonsusceptible   938 0.995  0.00236   0.990    0.999 
## 11 tet(B)          826 nonsusceptible   818 0.990  0.00341   0.984    0.997 
## 12 tet(C)           47 nonsusceptible    43 0.915  0.0407    0.835    0.995 
## 13 tet(D)           28 nonsusceptible    28 1      0         1        1     
## 14 tet(H)            2 nonsusceptible     2 1      0         1        1     
## 15 tet(M)            6 nonsusceptible     0 0      0         0        0     
## 16 <NA>           3318 nonsusceptible   193 0.0582 0.00406   0.0502   0.0661
tet$combined_plot

model for tetracycline

tet_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "tetracycline", refgene_class = "TETRACYCLINE")

tet_model <- logistf(resistant ~ ., data=tet_binary)

tet_logreg <- logreg_details(tet_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

tet_logreg

solo markers for tigecycline

tig <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tigecycline", refgene_subclass="TIGECYCLINE", afp_matched=afp_matched)

tig$solo_stats
## # A tibble: 2 × 8
##   `Gene symbol` total category           n      p      se ci.lower ci.upper
##   <chr>         <int> <chr>          <int>  <dbl>   <dbl>    <dbl>    <dbl>
## 1 <NA>            153 resistant          2 0.0131 0.00918        0   0.0311
## 2 <NA>            153 nonsusceptible     2 0.0131 0.00918        0   0.0311

solo markers for ciprofloxacin

cip <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ciprofloxacin", refgene_subclass="QUINOLONE", afp_matched=afp_matched)

cip$solo_stats
## # A tibble: 48 × 8
##    `Gene symbol`  total category      n       p      se ci.lower ci.upper
##    <chr>          <int> <chr>     <int>   <dbl>   <dbl>    <dbl>    <dbl>
##  1 aac(6')-Ib-cr5     1 resistant     0 0       0        0         0     
##  2 gyrA_D87G          6 resistant     0 0       0        0         0     
##  3 gyrA_D87N          5 resistant     0 0       0        0         0     
##  4 gyrA_D87Y          5 resistant     1 0.2     0.179    0         0.551 
##  5 gyrA_S83A         16 resistant     0 0       0        0         0     
##  6 gyrA_S83L        176 resistant    22 0.125   0.0249   0.0761    0.174 
##  7 gyrA_S83V          1 resistant     0 0       0        0         0     
##  8 marR_S3N         809 resistant     7 0.00865 0.00326  0.00227   0.0150
##  9 parC_A56T          7 resistant     0 0       0        0         0     
## 10 parC_S57T         50 resistant     0 0       0        0         0     
## # ℹ 38 more rows
cip$combined_plot

model for ciprofloxacin

cip_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ciprofloxacin", refgene_class = "QUINOLONE")

cip_model <- logistf(resistant ~ ., data=cip_binary)

cip_logreg <- logreg_details(cip_model) %>% 
  mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
  filter(marker != "(Intercept)") %>%
  mutate(marker=gsub("`","",marker)) %>%
  ggplot(aes(y=marker, col=sig)) + 
  geom_vline(xintercept=0) + 
  geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) + 
  geom_point(aes(x=est)) + 
  scale_color_manual(values=c("grey", "black")) + 
  theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")

cip_logreg